top_none <- read.table("~/Box/SCRMshaw_HD_all29Sets_allMethods_Jan2020/pCRMeval/top_noneAmplCurve/output_top_none_oldAllSetsAllMethods.bed",header = TRUE)
top_median_imm <- read.table("~/Box/SCRMshaw_HD_all29Sets_allMethods_Jan2020/pCRMeval/medianAmplitudeCurve/output_topMedianAmp_allold29sets_IMM.bed",header = TRUE)
top_elbow_imm <- read.table("~/Box/SCRMshaw_HD_all29Sets_allMethods_Jan2020/pCRMeval/elbowAmplitudeCurve/output_topAll29_elbow_IMM.bed",header = TRUE)
top_none_imm <- subset(top_none,top_none$Method=="imm")
boxplot(top_none_imm$PercentageTrainingSetSensitivity*100,top_median_imm$PercentageTrainingSetSensitivity*100,top_elbow_imm$PercentageTrainingSetSensitivity*100,top_none_imm$PercentageRedflyRecovered*100,top_median_imm$PercentageRedflyRecovered*100,top_elbow_imm$PercentageRedflyRecovered*100,top_none_imm$percentageExpressionPatternPrecision*100,top_median_imm$percentageExpressionPatternPrecision*100,top_elbow_imm$percentageExpressionPatternPrecision*100,names = c("TS_None","TS_Med","TS_Elb","RR_None","RR_Med","RR_Elb","EP_None","EP_Med","EP_Elb"),main="Percentage_comparison_3methods_allmeasures_new74sets",boxwex=0.6,col = c("orange","red","blue","orange","red","blue","orange","red","blue"),ylim=c(0,100))library(ggplot2)
#Training set Sensitivity
ggplot(top_none_imm)+
facet_wrap(~TsetName)+
geom_point(data=top_none_imm,aes(x=SCRMs,y=PercentageTrainingSetSensitivity*100,colour="None"),pch=8)+
geom_point(data=top_median_imm,aes(x=SCRMs,y=PercentageTrainingSetSensitivity*100,colour="Median"),pch=8)+
geom_point(data=top_elbow_imm,aes(x=SCRMs,y=PercentageTrainingSetSensitivity*100,colour="Elbow"),pch=8)+
scale_colour_manual(values = c("orange","red","blue","black"))+
ylim(0,100)+
theme_bw()#Redfly Recovery
ggplot(top_none_imm)+
facet_wrap(~TsetName)+
geom_point(data=top_none_imm,aes(x=SCRMs,y=PercentageRedflyRecovered*100,colour="None"),pch=8)+
geom_point(data=top_median_imm,aes(x=SCRMs,y=PercentageRedflyRecovered*100,colour="Median"),pch=8)+
geom_point(data=top_elbow_imm,aes(x=SCRMs,y=PercentageRedflyRecovered*100,colour="Elbow"),pch=8)+
scale_colour_manual(values = c("orange","red","blue"))+
ylim(0,100)+
theme_bw()## Warning: Removed 1 rows containing missing values (geom_point).
#Pattern Precision
ggplot(top_none_imm)+
facet_wrap(~TsetName)+
geom_point(data=top_none_imm,aes(x=SCRMs,y=percentageExpressionPatternPrecision*100,colour="None"),pch=8)+
geom_point(data=top_median_imm,aes(x=SCRMs,y=percentageExpressionPatternPrecision*100,colour="Median"),pch=8)+
geom_point(data=top_elbow_imm,aes(x=SCRMs,y=percentageExpressionPatternPrecision*100,colour="Elbow"),pch=8)+
ylim(0,100)+
scale_colour_manual(values = c("orange","red","blue"))+
theme_bw()#Pattern Precision permuted comparison
ggplot(top_none_imm)+
facet_wrap(~TsetName)+
geom_point(data=top_none_imm,aes(x=SCRMs,y=percentageExpressionPatternRecall*100,colour="None"),pch=8)+
geom_point(data=top_median_imm,aes(x=SCRMs,y=ExpectedpercentageExpressionPatternRecall*100,colour="Median"),pch=8)+
geom_point(data=top_elbow_imm,aes(x=SCRMs,y=ExpectedpercentageExpressionPatternRecall*100,colour="Elbow"),pch=8)+
ylim(0,100)+
scale_colour_manual(values = c("orange","red","blue"))+
theme_bw()#random 29
top_fake <-read.table("/Users/hasibaasma/Box/complete_data_representation_newMethod/files/del_randomAvg62sets_28times_Excel_withSpec.bed",header = T)
top_orig_oldSets_Med<- top_median_imm
s1 <- top_orig_oldSets_Med[order(top_orig_oldSets_Med$TsetName),]
s2 <- top_fake[order(top_fake$TsetName),]
subsetIMMevaluationOutputContRand1000_Actual1000_diff<- cbind.data.frame(s1$TsetName,s1$Method,s1$TsetSize,s1$SCRMs,s1$TrainingSetRecovered,s1$PercentageTrainingSetSensitivity,s1$REDflyRecovered,s1$PercentageRedflyRecovered,s1$percentageExpressionPatternRecall-s2$PercentageSpecificity,s2$SCRMs,s2$TrainingSetRecovered,s2$PercentageTrainingSetSensitivity,s2$REDflyRecovered,s2$PercentageRedflyRecovered,s1$PercentageTrainingSetSensitivity - s2$PercentageTrainingSetSensitivity,s1$PercentageRedflyRecovered - s2$PercentageRedflyRecovered)
colnames(subsetIMMevaluationOutputContRand1000_Actual1000_diff)[1] <-"TsetName"
colnames(subsetIMMevaluationOutputContRand1000_Actual1000_diff)[4] <-"SCRMs"
colnames(subsetIMMevaluationOutputContRand1000_Actual1000_diff)[9] <-"DifferenceBetweenActualRandomPercentageSpecificity"
colnames(subsetIMMevaluationOutputContRand1000_Actual1000_diff)[15] <- "DifferenceBetweenActualRandomTsetRecovery"
colnames(subsetIMMevaluationOutputContRand1000_Actual1000_diff)[16] <- "DifferenceBetweenActualRandomRedflyRecovery"
##checking..creating ranges of differences to write in the observations
minimum_differenceBetweenActualRandomRedflyRecovery <- list()
maximum_differenceBetweenActualRandomRedflyRecovery <- list()
median_differenceBetweenActualRandomRedflyRecovery <- list()
minimum_DifferenceBetweenActualTsetRecovery<- list()
maximum_DifferenceBetweenActualTsetRecovery<- list()
median_DifferenceBetweenActualTsetRecovery<- list()
minimum_specificity<- list()
maximum_specificity<- list()
median_specificity<- list()
for(i in unique(subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName)){
minimum_differenceBetweenActualRandomRedflyRecovery[[i]] <- min(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomRedflyRecovery[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
median_differenceBetweenActualRandomRedflyRecovery[[i]] <- median(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomRedflyRecovery[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
maximum_differenceBetweenActualRandomRedflyRecovery[[i]] <-max(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomRedflyRecovery[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
minimum_DifferenceBetweenActualTsetRecovery[[i]]<-min(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomTsetRecovery[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
maximum_DifferenceBetweenActualTsetRecovery[[i]]<-max(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomTsetRecovery[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
median_DifferenceBetweenActualTsetRecovery[[i]]<-median(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomTsetRecovery[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
minimum_specificity[[i]]<-min(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomPercentageSpecificity[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
maximum_specificity[[i]]<-max(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomPercentageSpecificity[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
median_specificity[[i]]<-median(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomPercentageSpecificity[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
}
library(qdapTools)
minred <- list2df(minimum_differenceBetweenActualRandomRedflyRecovery,"minimum difference to random in redfly recovery","TsetName")
medred <-list2df(median_differenceBetweenActualRandomRedflyRecovery,"median difference to random in redfly recovery","TsetName")
maxred<-list2df(maximum_differenceBetweenActualRandomRedflyRecovery,"maximum difference to random in redfly recovery","TsetName")
minTset<-list2df(minimum_DifferenceBetweenActualTsetRecovery,"minimum difference to random in Tset sensitivity","TsetName")
maxTset<- list2df(maximum_DifferenceBetweenActualTsetRecovery,"maximum difference to random in Tset sensitivity","TsetName")
medTset<-list2df(median_DifferenceBetweenActualTsetRecovery,"median difference to random in Tset sensitivity","TsetName")
minSpec<-list2df(minimum_specificity,"minimum difference to random in specificity","TsetName")
maxSpec<-list2df(maximum_specificity,"maximum difference to random in specificity","TsetName")
medSpec <-list2df(median_specificity,"median difference to random in specificity","TsetName")
df1<- merge(medred,medTset,by="TsetName")
dffinal<-merge(df1,medSpec,by="TsetName")
#dffinal<-merge(df7,by="TsetName")
#data.frame(unclass(table(dffinal)))
#table(dffinal)
write.table(dffinal,file="~/Box/Old_and_newTsets_3postProcMethods_3categories/oldTsets/finaltable_medianAmplitudeCurve_old29tsets.txt",sep = "\t")
library(knitr)
library(kableExtra)
library(dplyr)##
## Attaching package: 'dplyr'
## The following object is masked from 'package:qdapTools':
##
## id
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Conditions being set for these good Tsets include:
finaltable<- list()
#
#setting up conditions Good Tsets OverAll
dffinalDF <- as.data.frame(dffinal)
colnames(dffinalDF) <-c("TsetName","MedianDifferenceToRandomOfRedflyRecovery","MedianDifferenceInTsetSensitivity","MedianDifferenceInTsetSpecificity")
goodTsetOverAll<- filter(dffinalDF,MedianDifferenceToRandomOfRedflyRecovery >=8, MedianDifferenceInTsetSensitivity >=10,MedianDifferenceInTsetSpecificity >=8)
goodTsetsOverAll <- list()
colnames(goodTsetOverAll) <-c("TsetName","Difference to random in redfly recovery at cutoff","Difference in Tset Sensitivity to random at cutoff","Difference to random in Tset Specificity at cutoff")
goodTsetOverAll<- cbind(goodTsetOverAll[1],goodTsetOverAll[2],goodTsetOverAll[3],goodTsetOverAll[4])
kable_input3<- kable(goodTsetOverAll,digits = 2,caption = "Overall Good Training Sets")
#column_spec(kable_input3,2:10,width = "1cm")
kable_input3| TsetName | Difference to random in redfly recovery at cutoff | Difference in Tset Sensitivity to random at cutoff | Difference to random in Tset Specificity at cutoff |
|---|---|---|---|
#goodTsetOverAllConditions being set for these good Tsets(if we ignore specificity) include:
goodTsetsIgnoringSpecificity <- filter(dffinalDF,MedianDifferenceToRandomOfRedflyRecovery >=10,MedianDifferenceInTsetSensitivity>=10)
colnames(goodTsetsIgnoringSpecificity) <-c("TsetName","Difference to random in redfly recovery at cutoff","Difference in Tset Sensitivity to random at cutoff","Difference to random in Tset Specificity at cutoff")
goodTsetsIgnoringSpecificity <- cbind(goodTsetsIgnoringSpecificity[1],goodTsetsIgnoringSpecificity[2],goodTsetsIgnoringSpecificity[3],goodTsetsIgnoringSpecificity[4])
kable_input4<- kable(goodTsetsIgnoringSpecificity,digits = 2,caption = "Good Training Sets Ignoring Poor Specificity")
#column_spec(kable_input4,2:10,width = "2cm")
kable_input4| TsetName | Difference to random in redfly recovery at cutoff | Difference in Tset Sensitivity to random at cutoff | Difference to random in Tset Specificity at cutoff |
|---|---|---|---|
| mapping1.blastoderm | 17.59 | 10.83 | 5.85 |
| mapping1.glia | 12.33 | 58.39 | -1.97 |
| mapping1.malpighian_tubules | 10.51 | 41.72 | -7.70 |
| mapping1.tracheal_system | 17.34 | 34.58 | -3.49 |
| mapping1.ventral_ectoderm | 11.32 | 36.16 | -6.08 |
| mapping1.visceral_mesoderm | 23.12 | 22.67 | -5.28 |
| mapping2.glia | 17.97 | 58.39 | -1.25 |
These are the sets basically, that do not fall into very good or very bad training sets, mainly because they perform good in two measures but not in the third one. Excluded those sets which were already categorized as good.
Specific conditions being set for these Intermediate sets include:
intermediateTSets <- filter(dffinalDF,MedianDifferenceToRandomOfRedflyRecovery >=8 & MedianDifferenceInTsetSensitivity >=10 & MedianDifferenceInTsetSpecificity >=0 | MedianDifferenceToRandomOfRedflyRecovery >=0 & MedianDifferenceInTsetSensitivity >=10 & MedianDifferenceInTsetSpecificity >=5 |MedianDifferenceToRandomOfRedflyRecovery >=7 & MedianDifferenceInTsetSensitivity >=0 & MedianDifferenceInTsetSpecificity >=5 )
onlyintermediateTSets=data.frame()
onlyintermediateTSetsNum <- which(!(intermediateTSets$TsetName %in% goodTsetOverAll$TsetName))
for (i in 1:length(onlyintermediateTSetsNum)){
onlyintermediateTSetIter <- intermediateTSets[onlyintermediateTSetsNum[i],]
onlyintermediateTSets <- rbind(onlyintermediateTSets,onlyintermediateTSetIter)
}
intermediateTSets <- onlyintermediateTSets
colnames(intermediateTSets) <-c("TsetName","Difference to random in redfly recovery at cutoff","Difference in Tset Sensitivity to random at cutoff","Difference to random in Tset Specificity at cutoff")
intermediateTSets <- cbind(intermediateTSets[1],intermediateTSets[2],intermediateTSets[3],intermediateTSets[4])
kable_input4<- kable(intermediateTSets,digits = 2,caption = "Intermediate Tsets")
#column_spec(kable_input4,2:10,width = "3cm")
kable_input4| TsetName | Difference to random in redfly recovery at cutoff | Difference in Tset Sensitivity to random at cutoff | Difference to random in Tset Specificity at cutoff |
|---|---|---|---|
| mapping1.blastoderm | 17.59 | 10.83 | 5.85 |
Conditions being set for these poor sets include. If a set fullfills any of the following condition
#poor Tsets
poorTsetOverAll<- filter(dffinalDF, MedianDifferenceToRandomOfRedflyRecovery <=0 |MedianDifferenceInTsetSensitivity <= 0 | MedianDifferenceInTsetSpecificity<=0 )
colnames(poorTsetOverAll) <-c("TsetName","Difference to random in redfly recovery at cutoff","Difference in Tset Sensitivity to random at cutoff","Difference to random in Tset Specificity at cutoff")
poorTsetOverAll<- cbind(poorTsetOverAll[1],poorTsetOverAll[2],poorTsetOverAll[3],poorTsetOverAll[4])
kable_input6<- kable(poorTsetOverAll,digits = 2,caption = "Overall Poor Training Sets")
#column_spec(kable_input6,2:10,width = "2cm")
kable_input6| TsetName | Difference to random in redfly recovery at cutoff | Difference in Tset Sensitivity to random at cutoff | Difference to random in Tset Specificity at cutoff |
|---|---|---|---|
| mapping1.adult_mesoderm | 8.53 | 20.89 | -3.92 |
| mapping1.amnioserosa | -0.56 | 33.39 | -8.14 |
| mapping1.cns | 0.48 | -9.31 | -8.15 |
| mapping1.dorsal_ectoderm | 6.12 | 43.00 | -5.91 |
| mapping1.ectoderm | 9.89 | 9.33 | -6.50 |
| mapping1.endoderm | 17.95 | 3.21 | -7.34 |
| mapping1.eye | 4.80 | 33.39 | -7.76 |
| mapping1.female_gonad | -8.96 | -3.15 | -6.64 |
| mapping1.glia | 12.33 | 58.39 | -1.97 |
| mapping1.imaginal_disc | 6.14 | -9.15 | -5.96 |
| mapping1.male_gonad | -3.17 | 5.05 | -9.58 |
| mapping1.malpighian_tubules | 10.51 | 41.72 | -7.70 |
| mapping1.mesectoderm | 7.93 | 58.39 | -6.13 |
| mapping1.mesoderm | 0.25 | 23.77 | -5.90 |
| mapping1.neuroectoderm | 7.29 | 36.16 | -4.78 |
| mapping1.pns | 3.71 | 6.66 | -7.46 |
| mapping1.salivary_gland | 23.03 | 1.24 | -5.26 |
| mapping1.somatic_muscle | 8.84 | 5.61 | -4.00 |
| mapping1.tracheal_system | 17.34 | 34.58 | -3.49 |
| mapping1.ventral_ectoderm | 11.32 | 36.16 | -6.08 |
| mapping1.visceral_mesoderm | 23.12 | 22.67 | -5.28 |
| mapping2.ectoderm | 3.07 | 0.28 | -6.39 |
| mapping2.eye | 3.15 | 11.96 | -7.62 |
| mapping2.glia | 17.97 | 58.39 | -1.25 |
| mapping2.mesoderm | 15.81 | -13.61 | -6.35 |
| mapping2.neuronal | -3.80 | -10.69 | -8.32 |
| mapping2.reproductive_system | 12.95 | -18.28 | -9.58 |
| mapping2.wing | 8.93 | 6.01 | -5.97 |
#random 29
top_fake <-read.table("/Users/hasibaasma/Box/complete_data_representation_newMethod/files/del_randomAvg62sets_28times_Excel_withSpec.bed",header = T)
top_orig_oldSets_Med<- top_elbow_imm
s1 <- top_orig_oldSets_Med[order(top_orig_oldSets_Med$TsetName),]
s2 <- top_fake[order(top_fake$TsetName),]
subsetIMMevaluationOutputContRand1000_Actual1000_diff<- cbind.data.frame(s1$TsetName,s1$Method,s1$TsetSize,s1$SCRMs,s1$TrainingSetRecovered,s1$PercentageTrainingSetSensitivity,s1$REDflyRecovered,s1$PercentageRedflyRecovered,s1$percentageExpressionPatternRecall-s2$PercentageSpecificity,s2$SCRMs,s2$TrainingSetRecovered,s2$PercentageTrainingSetSensitivity,s2$REDflyRecovered,s2$PercentageRedflyRecovered,s1$PercentageTrainingSetSensitivity - s2$PercentageTrainingSetSensitivity,s1$PercentageRedflyRecovered - s2$PercentageRedflyRecovered)
colnames(subsetIMMevaluationOutputContRand1000_Actual1000_diff)[1] <-"TsetName"
colnames(subsetIMMevaluationOutputContRand1000_Actual1000_diff)[4] <-"SCRMs"
colnames(subsetIMMevaluationOutputContRand1000_Actual1000_diff)[9] <-"DifferenceBetweenActualRandomPercentageSpecificity"
colnames(subsetIMMevaluationOutputContRand1000_Actual1000_diff)[15] <- "DifferenceBetweenActualRandomTsetRecovery"
colnames(subsetIMMevaluationOutputContRand1000_Actual1000_diff)[16] <- "DifferenceBetweenActualRandomRedflyRecovery"
##checking..creating ranges of differences to write in the observations
minimum_differenceBetweenActualRandomRedflyRecovery <- list()
maximum_differenceBetweenActualRandomRedflyRecovery <- list()
median_differenceBetweenActualRandomRedflyRecovery <- list()
minimum_DifferenceBetweenActualTsetRecovery<- list()
maximum_DifferenceBetweenActualTsetRecovery<- list()
median_DifferenceBetweenActualTsetRecovery<- list()
minimum_specificity<- list()
maximum_specificity<- list()
median_specificity<- list()
for(i in unique(subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName)){
minimum_differenceBetweenActualRandomRedflyRecovery[[i]] <- min(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomRedflyRecovery[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
median_differenceBetweenActualRandomRedflyRecovery[[i]] <- median(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomRedflyRecovery[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
maximum_differenceBetweenActualRandomRedflyRecovery[[i]] <-max(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomRedflyRecovery[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
minimum_DifferenceBetweenActualTsetRecovery[[i]]<-min(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomTsetRecovery[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
maximum_DifferenceBetweenActualTsetRecovery[[i]]<-max(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomTsetRecovery[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
median_DifferenceBetweenActualTsetRecovery[[i]]<-median(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomTsetRecovery[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
minimum_specificity[[i]]<-min(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomPercentageSpecificity[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
maximum_specificity[[i]]<-max(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomPercentageSpecificity[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
median_specificity[[i]]<-median(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomPercentageSpecificity[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
}
library(qdapTools)
minred <- list2df(minimum_differenceBetweenActualRandomRedflyRecovery,"minimum difference to random in redfly recovery","TsetName")
medred <-list2df(median_differenceBetweenActualRandomRedflyRecovery,"median difference to random in redfly recovery","TsetName")
maxred<-list2df(maximum_differenceBetweenActualRandomRedflyRecovery,"maximum difference to random in redfly recovery","TsetName")
minTset<-list2df(minimum_DifferenceBetweenActualTsetRecovery,"minimum difference to random in Tset sensitivity","TsetName")
maxTset<- list2df(maximum_DifferenceBetweenActualTsetRecovery,"maximum difference to random in Tset sensitivity","TsetName")
medTset<-list2df(median_DifferenceBetweenActualTsetRecovery,"median difference to random in Tset sensitivity","TsetName")
minSpec<-list2df(minimum_specificity,"minimum difference to random in specificity","TsetName")
maxSpec<-list2df(maximum_specificity,"maximum difference to random in specificity","TsetName")
medSpec <-list2df(median_specificity,"median difference to random in specificity","TsetName")
df1<- merge(medred,medTset,by="TsetName")
dffinal<-merge(df1,medSpec,by="TsetName")
#dffinal<-merge(df7,by="TsetName")
#data.frame(unclass(table(dffinal)))
#table(dffinal)
write.table(dffinal,file="~/Box/Old_and_newTsets_3postProcMethods_3categories/oldTsets/finaltable_elbowAmplitudeCurve_old29tsets.txt",sep = "\t")
library(knitr)
library(kableExtra)
library(dplyr)Conditions being set for these good Tsets include:
finaltable<- list()
#
#setting up conditions Good Tsets OverAll
dffinalDF <- as.data.frame(dffinal)
colnames(dffinalDF) <-c("TsetName","MedianDifferenceToRandomOfRedflyRecovery","MedianDifferenceInTsetSensitivity","MedianDifferenceInTsetSpecificity")
goodTsetOverAll<- filter(dffinalDF,MedianDifferenceToRandomOfRedflyRecovery >=8, MedianDifferenceInTsetSensitivity >=10,MedianDifferenceInTsetSpecificity >=8)
goodTsetsOverAll <- list()
colnames(goodTsetOverAll) <-c("TsetName","Difference to random in redfly recovery at cutoff","Difference in Tset Sensitivity to random at cutoff","Difference to random in Tset Specificity at cutoff")
goodTsetOverAll<- cbind(goodTsetOverAll[1],goodTsetOverAll[2],goodTsetOverAll[3],goodTsetOverAll[4])
kable_input3<- kable(goodTsetOverAll,digits = 2,caption = "Overall Good Training Sets")
#column_spec(kable_input3,2:10,width = "2cm")
kable_input3| TsetName | Difference to random in redfly recovery at cutoff | Difference in Tset Sensitivity to random at cutoff | Difference to random in Tset Specificity at cutoff |
|---|---|---|---|
Conditions being set for these good Tsets(if we ignore specificity) include:
goodTsetsIgnoringSpecificity <- filter(dffinalDF,MedianDifferenceToRandomOfRedflyRecovery >=10,MedianDifferenceInTsetSensitivity>=10)
colnames(goodTsetsIgnoringSpecificity) <-c("TsetName","Difference to random in redfly recovery at cutoff","Difference in Tset Sensitivity to random at cutoff","Difference to random in Tset Specificity at cutoff")
goodTsetsIgnoringSpecificity <- cbind(goodTsetsIgnoringSpecificity[1],goodTsetsIgnoringSpecificity[2],goodTsetsIgnoringSpecificity[3],goodTsetsIgnoringSpecificity[4])
kable_input4<- kable(goodTsetsIgnoringSpecificity,digits = 2,caption = "Good Training Sets Ignoring Poor Specificity")
#column_spec(kable_input4,2:10,width = "2cm")
kable_input4| TsetName | Difference to random in redfly recovery at cutoff | Difference in Tset Sensitivity to random at cutoff | Difference to random in Tset Specificity at cutoff |
|---|---|---|---|
| mapping1.adult_mesoderm | 54.97 | 20.89 | -5.81 |
| mapping1.amnioserosa | 84.25 | 33.39 | -8.50 |
| mapping1.dorsal_ectoderm | 41.76 | 35.31 | -7.89 |
| mapping1.eye | 61.63 | 33.39 | -5.95 |
| mapping1.glia | 43.14 | 58.39 | -1.97 |
| mapping1.malpighian_tubules | 21.82 | 25.05 | -8.64 |
| mapping1.mesectoderm | 47.39 | 58.39 | -5.27 |
| mapping1.mesoderm | 13.56 | 12.23 | -7.48 |
| mapping1.neuroectoderm | 49.97 | 36.16 | -7.98 |
| mapping1.tracheal_system | 30.38 | 29.82 | -6.00 |
| mapping1.ventral_ectoderm | 26.29 | 30.61 | -7.66 |
| mapping1.visceral_mesoderm | 40.18 | 11.96 | -6.23 |
| mapping2.glia | 48.66 | 58.39 | -2.29 |
These are the sets basically, that do not fall into very good or very bad training sets, mainly because they perform good in two measures but not in the third one. Excluded those sets which were already categorized as good.
Specific conditions being set for these Intermediate sets include:
intermediateTSets <- filter(dffinalDF,MedianDifferenceToRandomOfRedflyRecovery >=8 & MedianDifferenceInTsetSensitivity >=10 & MedianDifferenceInTsetSpecificity >=0 | MedianDifferenceToRandomOfRedflyRecovery >=0 & MedianDifferenceInTsetSensitivity >=10 & MedianDifferenceInTsetSpecificity >=5 |MedianDifferenceToRandomOfRedflyRecovery >=7 & MedianDifferenceInTsetSensitivity >=0 & MedianDifferenceInTsetSpecificity >=5 )
onlyintermediateTSets=data.frame()
onlyintermediateTSetsNum <- which(!(intermediateTSets$TsetName %in% goodTsetOverAll$TsetName))
for (i in 1:length(onlyintermediateTSetsNum)){
onlyintermediateTSetIter <- intermediateTSets[onlyintermediateTSetsNum[i],]
onlyintermediateTSets <- rbind(onlyintermediateTSets,onlyintermediateTSetIter)
}
intermediateTSets <- onlyintermediateTSets
colnames(intermediateTSets) <-c("TsetName","Difference to random in redfly recovery at cutoff","Difference in Tset Sensitivity to random at cutoff","Difference to random in Tset Specificity at cutoff")
intermediateTSets <- cbind(intermediateTSets[1],intermediateTSets[2],intermediateTSets[3],intermediateTSets[4])
kable_input4<- kable(intermediateTSets,digits = 2,caption = "Intermediate Tsets")
#column_spec(kable_input4,2:10,width = "3cm")
kable_input4| TsetName | Difference to random in redfly recovery at cutoff | Difference in Tset Sensitivity to random at cutoff | Difference to random in Tset Specificity at cutoff | |
|---|---|---|---|---|
| NA | NA | NA | NA | NA |
Conditions being set for these poor sets include. If a set fullfills any of the following condition:
#poor Tsets
poorTsetOverAll<- filter(dffinalDF, MedianDifferenceToRandomOfRedflyRecovery <=0 |MedianDifferenceInTsetSensitivity <= 0 | MedianDifferenceInTsetSpecificity<=0 )
colnames(poorTsetOverAll) <-c("TsetName","Difference to random in redfly recovery at cutoff","Difference in Tset Sensitivity to random at cutoff","Difference to random in Tset Specificity at cutoff")
poorTsetOverAll<- cbind(poorTsetOverAll[1],poorTsetOverAll[2],poorTsetOverAll[3],poorTsetOverAll[4])
kable_input6<- kable(poorTsetOverAll,digits = 2,caption = "Overall Poor Training Sets")
#column_spec(kable_input6,2:10,width = "2cm")
kable_input6| TsetName | Difference to random in redfly recovery at cutoff | Difference in Tset Sensitivity to random at cutoff | Difference to random in Tset Specificity at cutoff |
|---|---|---|---|
| mapping1.adult_mesoderm | 54.97 | 20.89 | -5.81 |
| mapping1.amnioserosa | 84.25 | 33.39 | -8.50 |
| mapping1.cns | 0.49 | -20.07 | -8.82 |
| mapping1.dorsal_ectoderm | 41.76 | 35.31 | -7.89 |
| mapping1.ectoderm | 10.20 | -9.54 | -7.92 |
| mapping1.endoderm | 51.79 | -3.68 | -9.17 |
| mapping1.eye | 61.63 | 33.39 | -5.95 |
| mapping1.female_gonad | 5.26 | -10.84 | -6.64 |
| mapping1.glia | 43.14 | 58.39 | -1.97 |
| mapping1.imaginal_disc | 12.33 | -16.94 | -6.62 |
| mapping1.male_gonad | 21.82 | 5.05 | -7.83 |
| mapping1.malpighian_tubules | 21.82 | 25.05 | -8.64 |
| mapping1.mesectoderm | 47.39 | 58.39 | -5.27 |
| mapping1.mesoderm | 13.56 | 12.23 | -7.48 |
| mapping1.neuroectoderm | 49.97 | 36.16 | -7.98 |
| mapping1.pns | 11.43 | -3.68 | -7.79 |
| mapping1.salivary_gland | 54.58 | -5.90 | -7.42 |
| mapping1.somatic_muscle | 8.89 | -2.72 | -6.33 |
| mapping1.tracheal_system | 30.38 | 29.82 | -6.00 |
| mapping1.ventral_ectoderm | 26.29 | 30.61 | -7.66 |
| mapping1.visceral_mesoderm | 40.18 | 11.96 | -6.23 |
| mapping2.ectoderm | 4.16 | -5.13 | -7.46 |
| mapping2.eye | 19.36 | 1.24 | -8.93 |
| mapping2.glia | 48.66 | 58.39 | -2.29 |
| mapping2.mesoderm | 28.77 | -17.61 | -7.18 |
| mapping2.neuronal | 8.06 | -20.99 | -9.17 |
| mapping2.reproductive_system | 34.03 | -21.61 | -8.86 |
| mapping2.wing | 9.88 | -3.52 | -2.96 |
#random 29
top_fake <-read.table("/Users/hasibaasma/Box/complete_data_representation_newMethod/files/del_randomAvg62sets_28times_Excel_withSpec.bed",header = T)
top_orig_oldSets_Med<- top_none_imm
s1 <- top_orig_oldSets_Med[order(top_orig_oldSets_Med$TsetName),]
s2 <- top_fake[order(top_fake$TsetName),]
subsetIMMevaluationOutputContRand1000_Actual1000_diff<- cbind.data.frame(s1$TsetName,s1$Method,s1$TsetSize,s1$SCRMs,s1$TrainingSetRecovered,s1$PercentageTrainingSetSensitivity,s1$REDflyRecovered,s1$PercentageRedflyRecovered,s1$percentageExpressionPatternRecall-s2$PercentageSpecificity,s2$SCRMs,s2$TrainingSetRecovered,s2$PercentageTrainingSetSensitivity,s2$REDflyRecovered,s2$PercentageRedflyRecovered,s1$PercentageTrainingSetSensitivity - s2$PercentageTrainingSetSensitivity,s1$PercentageRedflyRecovered - s2$PercentageRedflyRecovered)
colnames(subsetIMMevaluationOutputContRand1000_Actual1000_diff)[1] <-"TsetName"
colnames(subsetIMMevaluationOutputContRand1000_Actual1000_diff)[4] <-"SCRMs"
colnames(subsetIMMevaluationOutputContRand1000_Actual1000_diff)[9] <-"DifferenceBetweenActualRandomPercentageSpecificity"
colnames(subsetIMMevaluationOutputContRand1000_Actual1000_diff)[15] <- "DifferenceBetweenActualRandomTsetRecovery"
colnames(subsetIMMevaluationOutputContRand1000_Actual1000_diff)[16] <- "DifferenceBetweenActualRandomRedflyRecovery"
##checking..creating ranges of differences to write in the observations
minimum_differenceBetweenActualRandomRedflyRecovery <- list()
maximum_differenceBetweenActualRandomRedflyRecovery <- list()
median_differenceBetweenActualRandomRedflyRecovery <- list()
minimum_DifferenceBetweenActualTsetRecovery<- list()
maximum_DifferenceBetweenActualTsetRecovery<- list()
median_DifferenceBetweenActualTsetRecovery<- list()
minimum_specificity<- list()
maximum_specificity<- list()
median_specificity<- list()
for(i in unique(subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName)){
minimum_differenceBetweenActualRandomRedflyRecovery[[i]] <- min(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomRedflyRecovery[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
median_differenceBetweenActualRandomRedflyRecovery[[i]] <- median(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomRedflyRecovery[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
maximum_differenceBetweenActualRandomRedflyRecovery[[i]] <-max(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomRedflyRecovery[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
minimum_DifferenceBetweenActualTsetRecovery[[i]]<-min(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomTsetRecovery[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
maximum_DifferenceBetweenActualTsetRecovery[[i]]<-max(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomTsetRecovery[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
median_DifferenceBetweenActualTsetRecovery[[i]]<-median(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomTsetRecovery[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
minimum_specificity[[i]]<-min(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomPercentageSpecificity[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
maximum_specificity[[i]]<-max(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomPercentageSpecificity[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
median_specificity[[i]]<-median(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomPercentageSpecificity[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
}
library(qdapTools)
minred <- list2df(minimum_differenceBetweenActualRandomRedflyRecovery,"minimum difference to random in redfly recovery","TsetName")
medred <-list2df(median_differenceBetweenActualRandomRedflyRecovery,"median difference to random in redfly recovery","TsetName")
maxred<-list2df(maximum_differenceBetweenActualRandomRedflyRecovery,"maximum difference to random in redfly recovery","TsetName")
minTset<-list2df(minimum_DifferenceBetweenActualTsetRecovery,"minimum difference to random in Tset sensitivity","TsetName")
maxTset<- list2df(maximum_DifferenceBetweenActualTsetRecovery,"maximum difference to random in Tset sensitivity","TsetName")
medTset<-list2df(median_DifferenceBetweenActualTsetRecovery,"median difference to random in Tset sensitivity","TsetName")
minSpec<-list2df(minimum_specificity,"minimum difference to random in specificity","TsetName")
maxSpec<-list2df(maximum_specificity,"maximum difference to random in specificity","TsetName")
medSpec <-list2df(median_specificity,"median difference to random in specificity","TsetName")
df1<- merge(medred,medTset,by="TsetName")
dffinal<-merge(df1,medSpec,by="TsetName")
#dffinal<-merge(df7,by="TsetName")
#data.frame(unclass(table(dffinal)))
#table(dffinal)
write.table(dffinal,file="~/Box/Old_and_newTsets_3postProcMethods_3categories/oldTsets/finaltable_noneAmplitudeCurve_old29tsets.txt",sep = "\t")
library(knitr)
library(kableExtra)
library(dplyr)Conditions being set for these good Tsets include:
finaltable<- list()
#
#setting up conditions Good Tsets OverAll
dffinalDF <- as.data.frame(dffinal)
colnames(dffinalDF) <-c("TsetName","MedianDifferenceToRandomOfRedflyRecovery","MedianDifferenceInTsetSensitivity","MedianDifferenceInTsetSpecificity")
goodTsetOverAll<- filter(dffinalDF,MedianDifferenceToRandomOfRedflyRecovery >=8, MedianDifferenceInTsetSensitivity >=10,MedianDifferenceInTsetSpecificity >=8)
goodTsetsOverAll <- list()
colnames(goodTsetOverAll) <-c("TsetName","Difference to random in redfly recovery at cutoff","Difference in Tset Sensitivity to random at cutoff","Difference to random in Tset Specificity at cutoff")
goodTsetOverAll<- cbind(goodTsetOverAll[1],goodTsetOverAll[2],goodTsetOverAll[3],goodTsetOverAll[4])
kable_input3<- kable(goodTsetOverAll,digits = 2,caption = "Overall Good Training Sets")
#column_spec(kable_input3,2:10,width = "2cm")
kable_input3| TsetName | Difference to random in redfly recovery at cutoff | Difference in Tset Sensitivity to random at cutoff | Difference to random in Tset Specificity at cutoff |
|---|---|---|---|
Conditions being set for these good Tsets(if we ignore specificity) include:
goodTsetsIgnoringSpecificity <- filter(dffinalDF,MedianDifferenceToRandomOfRedflyRecovery >=10,MedianDifferenceInTsetSensitivity>=10)
colnames(goodTsetsIgnoringSpecificity) <-c("TsetName","Difference to random in redfly recovery at cutoff","Difference in Tset Sensitivity to random at cutoff","Difference to random in Tset Specificity at cutoff")
goodTsetsIgnoringSpecificity <- cbind(goodTsetsIgnoringSpecificity[1],goodTsetsIgnoringSpecificity[2],goodTsetsIgnoringSpecificity[3],goodTsetsIgnoringSpecificity[4])
kable_input4<- kable(goodTsetsIgnoringSpecificity,digits = 2,caption = "Good Training Sets Ignoring Poor Specificity")
#column_spec(kable_input4,2:10,width = "2cm")
kable_input4| TsetName | Difference to random in redfly recovery at cutoff | Difference in Tset Sensitivity to random at cutoff | Difference to random in Tset Specificity at cutoff |
|---|---|---|---|
| mapping1.glia | 10.34 | 58.39 | -1.97 |
| mapping1.tracheal_system | 13.41 | 39.34 | -1.34 |
| mapping1.visceral_mesoderm | 18.26 | 26.24 | -2.88 |
| mapping2.glia | 10.09 | 58.39 | -1.25 |
These are the sets basically, that do not fall into very good or very bad training sets, mainly because they perform good in two measures but not in the third one. Excluded those sets which were already categorized as good.
Specific conditions being set for these Intermediate sets include:
intermediateTSets <- filter(dffinalDF,MedianDifferenceToRandomOfRedflyRecovery >=8 & MedianDifferenceInTsetSensitivity >=10 & MedianDifferenceInTsetSpecificity >=0 | MedianDifferenceToRandomOfRedflyRecovery >=0 & MedianDifferenceInTsetSensitivity >=10 & MedianDifferenceInTsetSpecificity >=5 |MedianDifferenceToRandomOfRedflyRecovery >=7 & MedianDifferenceInTsetSensitivity >=0 & MedianDifferenceInTsetSpecificity >=5 )
onlyintermediateTSets=data.frame()
onlyintermediateTSetsNum <- which(!(intermediateTSets$TsetName %in% goodTsetOverAll$TsetName))
for (i in 1:length(onlyintermediateTSetsNum)){
onlyintermediateTSetIter <- intermediateTSets[onlyintermediateTSetsNum[i],]
onlyintermediateTSets <- rbind(onlyintermediateTSets,onlyintermediateTSetIter)
}
intermediateTSets <- onlyintermediateTSets
colnames(intermediateTSets) <-c("TsetName","Difference to random in redfly recovery at cutoff","Difference in Tset Sensitivity to random at cutoff","Difference to random in Tset Specificity at cutoff")
intermediateTSets <- cbind(intermediateTSets[1],intermediateTSets[2],intermediateTSets[3],intermediateTSets[4])
kable_input4<- kable(intermediateTSets,digits = 2,caption = "Intermediate Tsets")
#column_spec(kable_input4,2:10,width = "3cm")
kable_input4| TsetName | Difference to random in redfly recovery at cutoff | Difference in Tset Sensitivity to random at cutoff | Difference to random in Tset Specificity at cutoff |
|---|---|---|---|
| mapping1.blastoderm | 5.56 | 18.14 | 10.17 |
Conditions being set for these poor sets include. If a set fullfills any of the following condition
#poor Tsets
poorTsetOverAll<- filter(dffinalDF, MedianDifferenceToRandomOfRedflyRecovery <=0 |MedianDifferenceInTsetSensitivity <= 0 | MedianDifferenceInTsetSpecificity<=0 )
colnames(poorTsetOverAll) <-c("TsetName","Difference to random in redfly recovery at cutoff","Difference in Tset Sensitivity to random at cutoff","Difference to random in Tset Specificity at cutoff")
poorTsetOverAll<- cbind(poorTsetOverAll[1],poorTsetOverAll[2],poorTsetOverAll[3],poorTsetOverAll[4])
kable_input6<- kable(poorTsetOverAll,digits = 2,caption = "Overall Poor Training Sets")
#column_spec(kable_input6,2:10,width = "2cm")
kable_input6| TsetName | Difference to random in redfly recovery at cutoff | Difference in Tset Sensitivity to random at cutoff | Difference to random in Tset Specificity at cutoff |
|---|---|---|---|
| mapping1.adult_mesoderm | 3.86 | 20.89 | -3.92 |
| mapping1.amnioserosa | -0.65 | 33.39 | -8.14 |
| mapping1.cns | 1.17 | 1.46 | -6.46 |
| mapping1.dorsal_ectoderm | 3.59 | 50.69 | -5.35 |
| mapping1.ectoderm | 6.08 | 14.99 | -4.75 |
| mapping1.endoderm | 11.88 | 6.66 | -6.25 |
| mapping1.eye | -0.22 | 33.39 | -7.76 |
| mapping1.female_gonad | -11.71 | 4.54 | -5.17 |
| mapping1.glia | 10.34 | 58.39 | -1.97 |
| mapping1.imaginal_disc | 6.70 | 10.34 | -4.65 |
| mapping1.male_gonad | -6.34 | 5.05 | -7.83 |
| mapping1.malpighian_tubules | 5.54 | 41.72 | -7.70 |
| mapping1.mesectoderm | 9.94 | 58.39 | -5.27 |
| mapping1.mesoderm | -0.24 | 31.46 | -3.79 |
| mapping1.neuroectoderm | 1.03 | 36.16 | -4.78 |
| mapping1.pns | -0.35 | 15.28 | -6.15 |
| mapping1.salivary_gland | 18.50 | 1.24 | -3.10 |
| mapping1.somatic_muscle | 8.27 | 8.39 | -0.75 |
| mapping1.tracheal_system | 13.41 | 39.34 | -1.34 |
| mapping1.ventral_ectoderm | 6.77 | 36.16 | -5.03 |
| mapping1.visceral_mesoderm | 18.26 | 26.24 | -2.88 |
| mapping2.ectoderm | 0.46 | 11.09 | -4.03 |
| mapping2.eye | 0.29 | 11.96 | -6.97 |
| mapping2.glia | 10.09 | 58.39 | -1.25 |
| mapping2.mesoderm | 11.28 | -4.61 | -4.04 |
| mapping2.neuronal | -2.12 | -4.50 | -7.30 |
| mapping2.reproductive_system | 10.57 | -11.61 | -8.86 |
| mapping2.wing | 7.88 | 20.29 | -2.96 |
New 74 training sets constructed on January 2020
top_none <- read.table("~/Box/NewTrainingSetsJan2020/pCRMeval_June20_postProcM/None/output_topNone_allNew74sets_IMM.bed",header = TRUE)
top_median_imm <- read.table("~/Box/NewTrainingSetsJan2020/pCRMeval_June20_postProcM/Median/output_topMed_allNew74sets_IMM.bed",header = TRUE)
#top_median_imm <- read.table("~/Box/output_topMedianAmp_allold29sets_IMM copy.bed",header = TRUE)
top_elbow_imm <- read.table("~/Box/NewTrainingSetsJan2020/pCRMeval_June20_postProcM/elbow/output_topElbow_allNew74sets_IMM.bed",header = TRUE)
#top_elbow_imm <- read.table("~/Box/output_topAll29_elbow copy.bed",header = TRUE)
top_none_imm <- subset(top_none,top_none$Method=="imm")
boxplot(top_none_imm$PercentageTrainingSetSensitivity*100,top_median_imm$PercentageTrainingSetSensitivity*100,top_elbow_imm$PercentageTrainingSetSensitivity*100,top_none_imm$PercentageRedflyRecovered*100,top_median_imm$PercentageRedflyRecovered*100,top_elbow_imm$PercentageRedflyRecovered*100,top_none_imm$percentageExpressionPatternPrecision*100,top_median_imm$percentageExpressionPatternPrecision*100,top_elbow_imm$percentageExpressionPatternPrecision*100,names = c("TS_None","TS_Med","TS_Elb","RR_None","RR_Med","RR_Elb","EP_None","EP_Med","EP_Elb"),main="Percentage_comparison_3methods_allmeasures_new74sets",boxwex=0.6,col = c("orange","red","blue","orange","red","blue","orange","red","blue"),ylim=c(0,100))library(ggplot2)
#Training set Sensitivity
ggplot(top_none_imm)+
facet_wrap(~TsetName)+
geom_point(data=top_none_imm,aes(x=SCRMs,y=PercentageTrainingSetSensitivity*100,colour="None"),pch=8)+
geom_point(data=top_median_imm,aes(x=SCRMs,y=PercentageTrainingSetSensitivity*100,colour="Median"),pch=8)+
geom_point(data=top_elbow_imm,aes(x=SCRMs,y=PercentageTrainingSetSensitivity*100,colour="Elbow"),pch=8)+
scale_colour_manual(values = c("orange","red","blue","black"))+
ylim(0,100)+
theme_bw()#Redfly Recovery
ggplot(top_none_imm)+
facet_wrap(~TsetName)+
geom_point(data=top_none_imm,aes(x=SCRMs,y=PercentageRedflyRecovered*100,colour="None"),pch=8)+
geom_point(data=top_median_imm,aes(x=SCRMs,y=PercentageRedflyRecovered*100,colour="Median"),pch=8)+
geom_point(data=top_elbow_imm,aes(x=SCRMs,y=PercentageRedflyRecovered*100,colour="Elbow"),pch=8)+
scale_colour_manual(values = c("orange","red","blue"))+
ylim(0,100)+
theme_bw()#Pattern Precision
ggplot(top_none_imm)+
facet_wrap(~TsetName)+
geom_point(data=top_none_imm,aes(x=SCRMs,y=percentageExpressionPatternPrecision*100,colour="None"),pch=8)+
geom_point(data=top_median_imm,aes(x=SCRMs,y=percentageExpressionPatternPrecision*100,colour="Median"),pch=8)+
geom_point(data=top_elbow_imm,aes(x=SCRMs,y=percentageExpressionPatternPrecision*100,colour="Elbow"),pch=8)+
ylim(0,100)+
scale_colour_manual(values = c("orange","red","blue"))+
theme_bw()#Pattern Precision permuted comparison
ggplot(top_none_imm)+
facet_wrap(~TsetName)+
geom_point(data=top_none_imm,aes(x=SCRMs,y=percentageExpressionPatternRecall*100,colour="None"),pch=8)+
geom_point(data=top_median_imm,aes(x=SCRMs,y=ExpectedpercentageExpressionPatternRecall*100,colour="Median"),pch=8)+
geom_point(data=top_elbow_imm,aes(x=SCRMs,y=ExpectedpercentageExpressionPatternRecall*100,colour="Elbow"),pch=8)+
ylim(0,100)+
scale_colour_manual(values = c("orange","red","blue"))+
theme_bw()#random 29
top_fake <-read.table("/Users/hasibaasma/Box/NewTrainingSetsJan2020/R/random62oldRun/old/del_randomMEDIAN62sets_75times_Excel_withSpec.txt",header = T)
top_orig_newMed<- top_median_imm
s1 <- top_orig_newMed[order(top_orig_newMed$TsetName),]
s2 <- top_fake[order(top_fake$TsetName),]
subsetIMMevaluationOutputContRand1000_Actual1000_diff<- cbind.data.frame(s1$TsetName,s1$Method,s1$TsetSize,s1$SCRMs,s1$TrainingSetRecovered,s1$PercentageTrainingSetSensitivity,s1$REDflyRecovered,s1$PercentageRedflyRecovered,s1$percentageExpressionPatternRecall-s1$ExpectedpercentageExpressionPatternRecall,s2$SCRMs,s2$TrainingSetRecovered,s2$PercentageTrainingSetSensitivity,s2$REDflyRecovered,s2$PercentageRedflyRecovered,s1$PercentageTrainingSetSensitivity - s2$PercentageTrainingSetSensitivity,s1$PercentageRedflyRecovered - s2$PercentageRedflyRecovered)
colnames(subsetIMMevaluationOutputContRand1000_Actual1000_diff)[1] <-"TsetName"
colnames(subsetIMMevaluationOutputContRand1000_Actual1000_diff)[4] <-"SCRMs"
colnames(subsetIMMevaluationOutputContRand1000_Actual1000_diff)[9] <-"DifferenceBetweenActualRandomPercentageSpecificity"
colnames(subsetIMMevaluationOutputContRand1000_Actual1000_diff)[15] <- "DifferenceBetweenActualRandomTsetRecovery"
colnames(subsetIMMevaluationOutputContRand1000_Actual1000_diff)[16] <- "DifferenceBetweenActualRandomRedflyRecovery"
##checking..creating ranges of differences to write in the observations
minimum_differenceBetweenActualRandomRedflyRecovery <- list()
maximum_differenceBetweenActualRandomRedflyRecovery <- list()
median_differenceBetweenActualRandomRedflyRecovery <- list()
minimum_DifferenceBetweenActualTsetRecovery<- list()
maximum_DifferenceBetweenActualTsetRecovery<- list()
median_DifferenceBetweenActualTsetRecovery<- list()
minimum_specificity<- list()
maximum_specificity<- list()
median_specificity<- list()
for(i in unique(subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName)){
minimum_differenceBetweenActualRandomRedflyRecovery[[i]] <- min(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomRedflyRecovery[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
median_differenceBetweenActualRandomRedflyRecovery[[i]] <- median(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomRedflyRecovery[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
maximum_differenceBetweenActualRandomRedflyRecovery[[i]] <-max(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomRedflyRecovery[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
minimum_DifferenceBetweenActualTsetRecovery[[i]]<-min(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomTsetRecovery[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
maximum_DifferenceBetweenActualTsetRecovery[[i]]<-max(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomTsetRecovery[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
median_DifferenceBetweenActualTsetRecovery[[i]]<-median(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomTsetRecovery[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
minimum_specificity[[i]]<-min(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomPercentageSpecificity[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
maximum_specificity[[i]]<-max(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomPercentageSpecificity[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
median_specificity[[i]]<-median(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomPercentageSpecificity[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
}
library(qdapTools)
minred <- list2df(minimum_differenceBetweenActualRandomRedflyRecovery,"minimum difference to random in redfly recovery","TsetName")
medred <-list2df(median_differenceBetweenActualRandomRedflyRecovery,"median difference to random in redfly recovery","TsetName")
maxred<-list2df(maximum_differenceBetweenActualRandomRedflyRecovery,"maximum difference to random in redfly recovery","TsetName")
minTset<-list2df(minimum_DifferenceBetweenActualTsetRecovery,"minimum difference to random in Tset sensitivity","TsetName")
maxTset<- list2df(maximum_DifferenceBetweenActualTsetRecovery,"maximum difference to random in Tset sensitivity","TsetName")
medTset<-list2df(median_DifferenceBetweenActualTsetRecovery,"median difference to random in Tset sensitivity","TsetName")
minSpec<-list2df(minimum_specificity,"minimum difference to random in specificity","TsetName")
maxSpec<-list2df(maximum_specificity,"maximum difference to random in specificity","TsetName")
medSpec <-list2df(median_specificity,"median difference to random in specificity","TsetName")
df1<- merge(medred,medTset,by="TsetName")
dffinal<-merge(df1,medSpec,by="TsetName")
#dffinal<-merge(df7,by="TsetName")
#data.frame(unclass(table(dffinal)))
#table(dffinal)
write.table(dffinal,file="~/Box/Old_and_newTsets_3postProcMethods_3categories/newTsets/finaltable_new74sets_medianAmplitudeCurve.txt",sep = "\t")
library(knitr)
library(kableExtra)
library(dplyr)Conditions being set for these good Tsets include:
finaltable<- list()
#
#setting up conditions Good Tsets OverAll
dffinalDF <- as.data.frame(dffinal)
colnames(dffinalDF) <-c("TsetName","MedianDifferenceToRandomOfRedflyRecovery","MedianDifferenceInTsetSensitivity","MedianDifferenceToPermutedInTsetSpecificity")
goodTsetOverAll<- filter(dffinalDF,MedianDifferenceToRandomOfRedflyRecovery >=8, MedianDifferenceInTsetSensitivity >=8,MedianDifferenceToPermutedInTsetSpecificity >=4)
goodTsetsOverAll <- list()
colnames(goodTsetOverAll) <-c("TsetName","Difference to random in redfly recovery at cutoff","Difference in Tset Sensitivity to random at cutoff","Difference to permuted in Tset Specificity at cutoff")
goodTsetOverAll<- cbind(goodTsetOverAll[1],goodTsetOverAll[2],goodTsetOverAll[3],goodTsetOverAll[4])
kable_input3<- kable(goodTsetOverAll,digits = 2,caption = "Overall Good Training Sets with TS and RR > 8 and SP > 4")
#column_spec(kable_input3,2:10,width = "2cm")
kable_input3| TsetName | Difference to random in redfly recovery at cutoff | Difference in Tset Sensitivity to random at cutoff | Difference to permuted in Tset Specificity at cutoff |
|---|---|---|---|
| adult_mesoderm.mapping1 | 32.98 | 23.33 | 8.83 |
| adult_muscle | 24.87 | 35.00 | 10.89 |
| adult_somatic_muscle | 27.58 | 20.00 | 12.20 |
| blastoderm.mapping1 | 25.30 | 32.92 | 11.09 |
| embryonic_muscle | 31.37 | 21.29 | 4.17 |
| eye.mapping1 | 30.36 | 30.00 | 5.30 |
| mesectoderm.mapping1 | 23.68 | 25.00 | 4.51 |
#write.table(goodTsetOverAll,file="~/Box/NewTrainingSetsJan2020/R/goodTsetOverAll.txt",sep = "\t")Conditions being set for these good Tsets(if we ignore specificity) include:
goodTsetsIgnoringSpecificity <- filter(dffinalDF,MedianDifferenceToRandomOfRedflyRecovery >=10,MedianDifferenceInTsetSensitivity>=10)
colnames(goodTsetsIgnoringSpecificity) <-c("TsetName","Difference to random in redfly recovery at cutoff","Difference in Tset Sensitivity to random at cutoff","Difference to permuted in Tset Specificity at cutoff")
goodTsetsIgnoringSpecificity <- cbind(goodTsetsIgnoringSpecificity[1],goodTsetsIgnoringSpecificity[2],goodTsetsIgnoringSpecificity[3],goodTsetsIgnoringSpecificity[4])
kable_input4<- kable(goodTsetsIgnoringSpecificity,digits = 2,caption = "Good Training Sets Ignoring Poor Specificity both > 10")
#column_spec(kable_input4,2:10,width = "2cm")
kable_input4| TsetName | Difference to random in redfly recovery at cutoff | Difference in Tset Sensitivity to random at cutoff | Difference to permuted in Tset Specificity at cutoff |
|---|---|---|---|
| adult_mesoderm.mapping1 | 32.98 | 23.33 | 8.83 |
| adult_muscle | 24.87 | 35.00 | 10.89 |
| adult_somatic_muscle | 27.58 | 20.00 | 12.20 |
| blastoderm.mapping1 | 25.30 | 32.92 | 11.09 |
| cardiac.mapping1 | 32.50 | 13.57 | 3.30 |
| dorsal_ectoderm.mapping1 | 20.73 | 19.37 | 2.16 |
| emb-larv_circulatory_system | 27.42 | 16.00 | 2.20 |
| emb-larv_hindgut | 24.66 | 45.71 | 2.70 |
| emb-larv_visceral | 23.90 | 15.56 | 0.78 |
| embryonic_midgut | 31.79 | 12.94 | 2.34 |
| embryonic_muscle | 31.37 | 21.29 | 4.17 |
| embryonic_somatic_muscle | 26.82 | 32.00 | 3.21 |
| embryonic_trachea | 21.54 | 22.50 | 3.33 |
| eye.mapping1 | 30.36 | 30.00 | 5.30 |
| fat_body.mapping1 | 12.75 | 28.75 | 2.59 |
| haltere_disc | 29.99 | 15.56 | 0.64 |
| mesectoderm.mapping1 | 23.68 | 25.00 | 4.51 |
| mesoderm.mapping1 | 28.11 | 17.78 | 3.25 |
| myoblast | 20.77 | 26.67 | 1.35 |
| ventral_ectoderm.mapping1 | 15.97 | 21.90 | 1.44 |
#write.table(goodTsetsIgnoringSpecificity,file="~/Box/NewTrainingSetsJan2020/R/goodTsetsIgnoringSpecificity.txt",sep = "\t")These are the sets basically, that do not fall into very good or very bad training sets, mainly because they perform good in two measures but not in the third one. Not anymore , Excluded those sets which were already categorized as good.
Specific conditions being set for these Intermediate sets include:
intermediateTSets <- filter(dffinalDF,MedianDifferenceToRandomOfRedflyRecovery >=8 & MedianDifferenceInTsetSensitivity >=8 & MedianDifferenceToPermutedInTsetSpecificity >=0 | MedianDifferenceToRandomOfRedflyRecovery >=0 & MedianDifferenceInTsetSensitivity >=8 & MedianDifferenceToPermutedInTsetSpecificity >=4 |MedianDifferenceToRandomOfRedflyRecovery >=8 & MedianDifferenceInTsetSensitivity >=0 & MedianDifferenceToPermutedInTsetSpecificity >=4 )
onlyintermediateTSets=data.frame()
# onlyintermediateTSetsNum <- which(!(intermediateTSets$TsetName %in% goodTsetOverAll$TsetName))
# for (i in 1:length(onlyintermediateTSetsNum)){
# onlyintermediateTSetIter <- intermediateTSets[onlyintermediateTSetsNum[i],]
# onlyintermediateTSets <- rbind(onlyintermediateTSets,onlyintermediateTSetIter)
#
# }
#intermediateTSets <- onlyintermediateTSets
colnames(intermediateTSets) <-c("TsetName","Difference to random in redfly recovery at cutoff","Difference in Tset Sensitivity to random at cutoff","Difference to random in Tset Specificity at cutoff")
intermediateTSets <- cbind(intermediateTSets[1],intermediateTSets[2],intermediateTSets[3],intermediateTSets[4])
kable_input41<- kable(intermediateTSets,digits = 2,caption = "Intermediate Tsets with all 3 interm")
#column_spec(kable_input41,2:10,width = "3cm")
kable_input41| TsetName | Difference to random in redfly recovery at cutoff | Difference in Tset Sensitivity to random at cutoff | Difference to random in Tset Specificity at cutoff |
|---|---|---|---|
| adult_mesoderm.mapping1 | 32.98 | 23.33 | 8.83 |
| adult_muscle | 24.87 | 35.00 | 10.89 |
| adult_pns | 0.94 | 20.00 | 5.06 |
| adult_somatic_muscle | 27.58 | 20.00 | 12.20 |
| amnioserosa.mapping1 | 31.40 | 7.37 | 5.41 |
| antennal_lobe | 20.40 | 8.39 | 0.99 |
| blastoderm.mapping1 | 25.30 | 32.92 | 11.09 |
| cardiac.mapping1 | 32.50 | 13.57 | 3.30 |
| dorsal_ectoderm.mapping1 | 20.73 | 19.37 | 2.16 |
| emb-larv_circulatory_system | 27.42 | 16.00 | 2.20 |
| emb-larv_excretory | 31.83 | 10.00 | 1.96 |
| emb-larv_foregut | 22.92 | 4.44 | 4.91 |
| emb-larv_hindgut | 24.66 | 45.71 | 2.70 |
| emb-larv_visceral | 23.90 | 15.56 | 0.78 |
| embryonic_epidermis | 37.52 | 6.67 | 4.70 |
| embryonic_midgut | 31.79 | 12.94 | 2.34 |
| embryonic_muscle | 31.37 | 21.29 | 4.17 |
| embryonic_pns | 26.26 | 10.00 | 2.49 |
| embryonic_sense_organ | 1.62 | 26.67 | 8.16 |
| embryonic_somatic_muscle | 26.82 | 32.00 | 3.21 |
| embryonic_trachea | 21.54 | 22.50 | 3.33 |
| eye.mapping1 | 30.36 | 30.00 | 5.30 |
| fat_body.mapping1 | 12.75 | 28.75 | 2.59 |
| haltere_disc | 29.99 | 15.56 | 0.64 |
| leg_disc | 21.28 | 10.00 | 1.76 |
| malpig.mapping1 | 33.72 | 10.00 | 2.85 |
| mesectoderm.mapping1 | 23.68 | 25.00 | 4.51 |
| mesoderm.mapping1 | 28.11 | 17.78 | 3.25 |
| myoblast | 20.77 | 26.67 | 1.35 |
| salivary.mapping1 | 26.37 | 2.11 | 5.81 |
| ventral_ectoderm.mapping1 | 15.97 | 21.90 | 1.44 |
#write.table(intermediateTSets,file="~/Box/NewTrainingSetsJan2020/R/intermediateTSets.txt",sep = "\t")Conditions being set for these poor sets include. If a set is poor in two categories then its a poor set
#poor Tsets
poorTsetOverAll<- filter(dffinalDF, MedianDifferenceToRandomOfRedflyRecovery <=8 & MedianDifferenceInTsetSensitivity <= 8 | MedianDifferenceInTsetSensitivity <= 8 & MedianDifferenceToPermutedInTsetSpecificity<=4 |MedianDifferenceToPermutedInTsetSpecificity<=4 & MedianDifferenceToRandomOfRedflyRecovery <=8 )
colnames(poorTsetOverAll) <-c("TsetName","Difference to random in redfly recovery at cutoff","Difference in Tset Sensitivity to random at cutoff","Difference to random in Tset Specificity at cutoff")
poorTsetOverAll<- cbind(poorTsetOverAll[1],poorTsetOverAll[2],poorTsetOverAll[3],poorTsetOverAll[4])
kable_input6<- kable(poorTsetOverAll,digits = 2,caption = "Overall Poor Training Sets with any one below 0")
#column_spec(kable_input6,2:10,width = "2cm")
kable_input6| TsetName | Difference to random in redfly recovery at cutoff | Difference in Tset Sensitivity to random at cutoff | Difference to random in Tset Specificity at cutoff |
|---|---|---|---|
| adult_brain | 12.23 | -30.91 | 1.06 |
| adult_circulatory | 24.60 | 5.00 | 2.73 |
| adult_cns | 12.68 | -32.68 | 1.18 |
| adult_midgut | 22.27 | 7.37 | -0.31 |
| adult_nervous | 16.47 | -35.05 | 1.33 |
| adult_sense_organ | 9.52 | 3.75 | 2.76 |
| antenna | 18.67 | -30.00 | 0.96 |
| antennal_disc | 7.86 | -1.90 | 0.74 |
| disc.mapping1 | 22.36 | -6.67 | 2.24 |
| disc.mapping2 | 22.23 | 3.24 | 1.98 |
| ectoderm.mapping1 | 24.18 | -12.41 | 1.47 |
| ectoderm.mapping2 | 26.67 | -21.11 | 1.08 |
| emb-larv_fat_body | 7.57 | 35.00 | 2.78 |
| emb-larval_cns | 0.23 | -4.29 | 0.35 |
| emb-larval_mushroombody | 15.23 | 2.50 | 1.72 |
| emb-larval_neuron | 23.51 | -31.78 | 0.30 |
| emb-larval_opticlobe | 29.35 | 4.83 | 1.84 |
| embryonic_salivary | 1.91 | 31.43 | 2.29 |
| embryonic_ventral_nervous_system | 27.27 | -17.94 | 1.63 |
| endoderm.mapping1 | 29.93 | -5.38 | 2.02 |
| eye_disc | 18.57 | 2.86 | 2.13 |
| eye-antennal_disc | 22.21 | -2.22 | 3.26 |
| eye.mapping2 | 30.94 | -19.69 | 2.02 |
| female_reproductive | 18.67 | 8.00 | 2.94 |
| genital_disc | 28.00 | -4.29 | 1.40 |
| glia | 24.81 | -3.64 | 1.40 |
| glia.mapping1 | 21.17 | 2.50 | 2.00 |
| glia.mapping2 | 22.68 | 0.48 | 2.19 |
| gonad | 30.24 | -34.74 | 3.09 |
| imaginal_disc | 20.44 | -16.06 | 1.94 |
| leg | 25.76 | 0.00 | 1.82 |
| mesoderm.mapping2 | 35.07 | -31.10 | 2.16 |
| neuron | 23.04 | -34.81 | 0.52 |
| pns.mapping1 | 22.78 | 5.28 | 2.01 |
| reproductive.mapping2 | 29.73 | -35.65 | 3.33 |
| somatic_muscle.mapping1 | 29.34 | 7.83 | 3.57 |
| trachea.mapping1 | 21.62 | 1.67 | 1.55 |
| visceral.mapping1 | 27.43 | 6.34 | 2.78 |
| wing.mapping2 | 22.02 | -0.61 | 2.02 |
#write.table(poorTsetOverAll,file="~/Box/NewTrainingSetsJan2020/R/poorTsetOverAll.txt",sep = "\t")#random 29
top_fake <-read.table("/Users/hasibaasma/Box/NewTrainingSetsJan2020/R/random62oldRun/old/del_randomMEDIAN62sets_75times_Excel_withSpec.txt",header = T)
top_orig_newMed<- top_elbow_imm
s1 <- top_orig_newMed[order(top_orig_newMed$TsetName),]
s2 <- top_fake[order(top_fake$TsetName),]
subsetIMMevaluationOutputContRand1000_Actual1000_diff<- cbind.data.frame(s1$TsetName,s1$Method,s1$TsetSize,s1$SCRMs,s1$TrainingSetRecovered,s1$PercentageTrainingSetSensitivity,s1$REDflyRecovered,s1$PercentageRedflyRecovered,s1$percentageExpressionPatternRecall-s1$ExpectedpercentageExpressionPatternRecall,s2$SCRMs,s2$TrainingSetRecovered,s2$PercentageTrainingSetSensitivity,s2$REDflyRecovered,s2$PercentageRedflyRecovered,s1$PercentageTrainingSetSensitivity - s2$PercentageTrainingSetSensitivity,s1$PercentageRedflyRecovered - s2$PercentageRedflyRecovered)
colnames(subsetIMMevaluationOutputContRand1000_Actual1000_diff)[1] <-"TsetName"
colnames(subsetIMMevaluationOutputContRand1000_Actual1000_diff)[4] <-"SCRMs"
colnames(subsetIMMevaluationOutputContRand1000_Actual1000_diff)[9] <-"DifferenceBetweenActualRandomPercentageSpecificity"
colnames(subsetIMMevaluationOutputContRand1000_Actual1000_diff)[15] <- "DifferenceBetweenActualRandomTsetRecovery"
colnames(subsetIMMevaluationOutputContRand1000_Actual1000_diff)[16] <- "DifferenceBetweenActualRandomRedflyRecovery"
##checking..creating ranges of differences to write in the observations
minimum_differenceBetweenActualRandomRedflyRecovery <- list()
maximum_differenceBetweenActualRandomRedflyRecovery <- list()
median_differenceBetweenActualRandomRedflyRecovery <- list()
minimum_DifferenceBetweenActualTsetRecovery<- list()
maximum_DifferenceBetweenActualTsetRecovery<- list()
median_DifferenceBetweenActualTsetRecovery<- list()
minimum_specificity<- list()
maximum_specificity<- list()
median_specificity<- list()
for(i in unique(subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName)){
minimum_differenceBetweenActualRandomRedflyRecovery[[i]] <- min(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomRedflyRecovery[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
median_differenceBetweenActualRandomRedflyRecovery[[i]] <- median(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomRedflyRecovery[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
maximum_differenceBetweenActualRandomRedflyRecovery[[i]] <-max(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomRedflyRecovery[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
minimum_DifferenceBetweenActualTsetRecovery[[i]]<-min(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomTsetRecovery[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
maximum_DifferenceBetweenActualTsetRecovery[[i]]<-max(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomTsetRecovery[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
median_DifferenceBetweenActualTsetRecovery[[i]]<-median(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomTsetRecovery[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
minimum_specificity[[i]]<-min(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomPercentageSpecificity[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
maximum_specificity[[i]]<-max(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomPercentageSpecificity[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
median_specificity[[i]]<-median(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomPercentageSpecificity[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
}
library(qdapTools)
minred <- list2df(minimum_differenceBetweenActualRandomRedflyRecovery,"minimum difference to random in redfly recovery","TsetName")
medred <-list2df(median_differenceBetweenActualRandomRedflyRecovery,"median difference to random in redfly recovery","TsetName")
maxred<-list2df(maximum_differenceBetweenActualRandomRedflyRecovery,"maximum difference to random in redfly recovery","TsetName")
minTset<-list2df(minimum_DifferenceBetweenActualTsetRecovery,"minimum difference to random in Tset sensitivity","TsetName")
maxTset<- list2df(maximum_DifferenceBetweenActualTsetRecovery,"maximum difference to random in Tset sensitivity","TsetName")
medTset<-list2df(median_DifferenceBetweenActualTsetRecovery,"median difference to random in Tset sensitivity","TsetName")
minSpec<-list2df(minimum_specificity,"minimum difference to random in specificity","TsetName")
maxSpec<-list2df(maximum_specificity,"maximum difference to random in specificity","TsetName")
medSpec <-list2df(median_specificity,"median difference to random in specificity","TsetName")
df1<- merge(medred,medTset,by="TsetName")
dffinal<-merge(df1,medSpec,by="TsetName")
#dffinal<-merge(df7,by="TsetName")
#data.frame(unclass(table(dffinal)))
#table(dffinal)
write.table(dffinal,file="~/Box/Old_and_newTsets_3postProcMethods_3categories/newTsets/finaltable_new74sets_elbowAmplitudeCurve.txt",sep = "\t")
library(knitr)
library(kableExtra)
library(dplyr)Conditions being set for these good Tsets include:
finaltable<- list()
#
#setting up conditions Good Tsets OverAll
dffinalDF <- as.data.frame(dffinal)
colnames(dffinalDF) <-c("TsetName","MedianDifferenceToRandomOfRedflyRecovery","MedianDifferenceInTsetSensitivity","MedianDifferenceToPermutedInTsetSpecificity")
goodTsetOverAll<- filter(dffinalDF,MedianDifferenceToRandomOfRedflyRecovery >=8, MedianDifferenceInTsetSensitivity >=8,MedianDifferenceToPermutedInTsetSpecificity >=4)
goodTsetsOverAll <- list()
colnames(goodTsetOverAll) <-c("TsetName","Difference to random in redfly recovery at cutoff","Difference in Tset Sensitivity to random at cutoff","Difference to permuted in Tset Specificity at cutoff")
goodTsetOverAll<- cbind(goodTsetOverAll[1],goodTsetOverAll[2],goodTsetOverAll[3],goodTsetOverAll[4])
kable_input3<- kable(goodTsetOverAll,digits = 2,caption = "Overall Good Training Sets with TS and RR > 8 and SP > 4")
#column_spec(kable_input3,2:10,width = "2cm")
kable_input3| TsetName | Difference to random in redfly recovery at cutoff | Difference in Tset Sensitivity to random at cutoff | Difference to permuted in Tset Specificity at cutoff |
|---|---|---|---|
| adult_muscle | 38.49 | 28.75 | 11.11 |
| adult_somatic_muscle | 41.32 | 20.00 | 12.20 |
| blastoderm.mapping1 | 47.60 | 22.50 | 6.62 |
#write.table(goodTsetOverAll,file="~/Box/NewTrainingSetsJan2020/R/goodTsetOverAll.txt",sep = "\t")Conditions being set for these good Tsets(if we ignore specificity) include:
goodTsetsIgnoringSpecificity <- filter(dffinalDF,MedianDifferenceToRandomOfRedflyRecovery >=10,MedianDifferenceInTsetSensitivity>=10)
colnames(goodTsetsIgnoringSpecificity) <-c("TsetName","Difference to random in redfly recovery at cutoff","Difference in Tset Sensitivity to random at cutoff","Difference to permuted in Tset Specificity at cutoff")
goodTsetsIgnoringSpecificity <- cbind(goodTsetsIgnoringSpecificity[1],goodTsetsIgnoringSpecificity[2],goodTsetsIgnoringSpecificity[3],goodTsetsIgnoringSpecificity[4])
kable_input4<- kable(goodTsetsIgnoringSpecificity,digits = 2,caption = "Good Training Sets Ignoring Poor Specificity both > 10")
#column_spec(kable_input4,2:10,width = "2cm")
kable_input4| TsetName | Difference to random in redfly recovery at cutoff | Difference in Tset Sensitivity to random at cutoff | Difference to permuted in Tset Specificity at cutoff |
|---|---|---|---|
| adult_muscle | 38.49 | 28.75 | 11.11 |
| adult_somatic_muscle | 41.32 | 20.00 | 12.20 |
| blastoderm.mapping1 | 47.60 | 22.50 | 6.62 |
| emb-larv_fat_body | 56.40 | 28.75 | 2.78 |
| emb-larv_hindgut | 42.66 | 38.57 | 2.77 |
| embryonic_salivary | 11.61 | 31.43 | -0.29 |
| eye.mapping1 | 56.40 | 20.00 | 3.18 |
| haltere_disc | 45.48 | 15.56 | 0.24 |
| myoblast | 48.23 | 15.56 | 0.94 |
| ventral_ectoderm.mapping1 | 45.71 | 21.90 | 1.57 |
#write.table(goodTsetsIgnoringSpecificity,file="~/Box/NewTrainingSetsJan2020/R/goodTsetsIgnoringSpecificity.txt",sep = "\t")These are the sets basically, that do not fall into very good or very bad training sets, mainly because they perform good in two measures but not in the third one. Not anymore , Excluded those sets which were already categorized as good.
Specific conditions being set for these Intermediate sets include:
intermediateTSets <- filter(dffinalDF,MedianDifferenceToRandomOfRedflyRecovery >=8 & MedianDifferenceInTsetSensitivity >=8 & MedianDifferenceToPermutedInTsetSpecificity >=0 | MedianDifferenceToRandomOfRedflyRecovery >=0 & MedianDifferenceInTsetSensitivity >=8 & MedianDifferenceToPermutedInTsetSpecificity >=4 |MedianDifferenceToRandomOfRedflyRecovery >=8 & MedianDifferenceInTsetSensitivity >=0 & MedianDifferenceToPermutedInTsetSpecificity >=4 )
onlyintermediateTSets=data.frame()
# onlyintermediateTSetsNum <- which(!(intermediateTSets$TsetName %in% goodTsetOverAll$TsetName))
# for (i in 1:length(onlyintermediateTSetsNum)){
# onlyintermediateTSetIter <- intermediateTSets[onlyintermediateTSetsNum[i],]
# onlyintermediateTSets <- rbind(onlyintermediateTSets,onlyintermediateTSetIter)
#
# }
#intermediateTSets <- onlyintermediateTSets
colnames(intermediateTSets) <-c("TsetName","Difference to random in redfly recovery at cutoff","Difference in Tset Sensitivity to random at cutoff","Difference to random in Tset Specificity at cutoff")
intermediateTSets <- cbind(intermediateTSets[1],intermediateTSets[2],intermediateTSets[3],intermediateTSets[4])
kable_input41<- kable(intermediateTSets,digits = 2,caption = "Intermediate Tsets with all 3 interm")
#column_spec(kable_input41,2:10,width = "3cm")
kable_input41| TsetName | Difference to random in redfly recovery at cutoff | Difference in Tset Sensitivity to random at cutoff | Difference to random in Tset Specificity at cutoff |
|---|---|---|---|
| adult_mesoderm.mapping1 | 47.22 | 6.67 | 7.66 |
| adult_muscle | 38.49 | 28.75 | 11.11 |
| adult_somatic_muscle | 41.32 | 20.00 | 12.20 |
| blastoderm.mapping1 | 47.60 | 22.50 | 6.62 |
| cardiac.mapping1 | 43.46 | 10.00 | 2.73 |
| emb-larv_fat_body | 56.40 | 28.75 | 2.78 |
| emb-larv_hindgut | 42.66 | 38.57 | 2.77 |
| embryonic_sense_organ | 3.26 | 21.90 | 4.87 |
| embryonic_trachea | 27.14 | 10.00 | 1.30 |
| eye.mapping1 | 56.40 | 20.00 | 3.18 |
| fat_body.mapping1 | 46.37 | 10.00 | 1.55 |
| haltere_disc | 45.48 | 15.56 | 0.24 |
| myoblast | 48.23 | 15.56 | 0.94 |
| ventral_ectoderm.mapping1 | 45.71 | 21.90 | 1.57 |
#write.table(intermediateTSets,file="~/Box/NewTrainingSetsJan2020/R/intermediateTSets.txt",sep = "\t")Conditions being set for these poor sets include. If a set is poor in two categories then its a poor set:
#poor Tsets
poorTsetOverAll<- filter(dffinalDF, MedianDifferenceToRandomOfRedflyRecovery <=8 & MedianDifferenceInTsetSensitivity <= 8 | MedianDifferenceInTsetSensitivity <= 8 & MedianDifferenceToPermutedInTsetSpecificity<=4 |MedianDifferenceToPermutedInTsetSpecificity<=4 & MedianDifferenceToRandomOfRedflyRecovery <=8 )
colnames(poorTsetOverAll) <-c("TsetName","Difference to random in redfly recovery at cutoff","Difference in Tset Sensitivity to random at cutoff","Difference to random in Tset Specificity at cutoff")
poorTsetOverAll<- cbind(poorTsetOverAll[1],poorTsetOverAll[2],poorTsetOverAll[3],poorTsetOverAll[4])
kable_input6<- kable(poorTsetOverAll,digits = 2,caption = "Overall Poor Training Sets with any one below 0")
#column_spec(kable_input6,2:10,width = "2cm")
kable_input6| TsetName | Difference to random in redfly recovery at cutoff | Difference in Tset Sensitivity to random at cutoff | Difference to random in Tset Specificity at cutoff |
|---|---|---|---|
| adult_brain | 14.42 | -36.10 | 0.81 |
| adult_circulatory | 40.87 | -5.00 | 0.00 |
| adult_cns | 16.33 | -36.34 | 0.89 |
| adult_foregut | 47.22 | 0.00 | -0.48 |
| adult_midgut | 45.39 | -3.16 | -0.15 |
| adult_nervous | 18.42 | -37.03 | 0.91 |
| adult_pns | 38.41 | 4.00 | 3.49 |
| adult_sense_organ | 25.33 | -12.92 | 1.15 |
| amnioserosa.mapping1 | 32.63 | -0.53 | 3.02 |
| antenna | 26.91 | -37.50 | 0.42 |
| antennal_disc | 9.44 | -6.67 | 0.43 |
| antennal_lobe | 31.54 | 1.94 | 0.43 |
| disc.mapping1 | 25.27 | -17.27 | 0.75 |
| disc.mapping2 | 35.75 | -4.86 | 0.37 |
| dorsal_ectoderm.mapping1 | 36.72 | 6.87 | 1.58 |
| ectoderm.mapping1 | 29.23 | -21.03 | 1.13 |
| ectoderm.mapping2 | 29.95 | -31.11 | 0.62 |
| emb-larv_circulatory_system | 47.49 | 4.00 | 0.94 |
| emb-larv_excretory | 46.89 | -4.29 | 0.45 |
| emb-larv_foregut | 30.50 | -1.11 | 1.27 |
| emb-larv_visceral | 32.35 | 4.44 | 0.23 |
| emb-larval_cns | 2.78 | -13.21 | -0.06 |
| emb-larval_mushroombody | 20.55 | -5.00 | 1.37 |
| emb-larval_neuron | 32.79 | -35.89 | 0.05 |
| emb-larval_opticlobe | 36.67 | -2.07 | 1.27 |
| embryonic_epidermis | 45.67 | 6.67 | 2.80 |
| embryonic_midgut | 36.11 | 1.18 | 1.34 |
| embryonic_muscle | 49.19 | 5.16 | 2.92 |
| embryonic_pns | 32.18 | -11.43 | 2.28 |
| embryonic_somatic_muscle | 34.78 | 8.00 | 2.61 |
| embryonic_ventral_nervous_system | 33.67 | -25.29 | 0.60 |
| endoderm.mapping1 | 35.43 | -22.69 | 0.53 |
| eye_disc | 29.63 | -5.71 | 1.72 |
| eye-antennal_disc | 33.94 | -8.89 | 1.88 |
| eye.mapping2 | 41.34 | -24.38 | 0.87 |
| female_gonad.mapping1 | 42.36 | -12.41 | 1.87 |
| female_reproductive | 25.26 | -4.00 | 1.56 |
| genital_disc | 30.98 | -7.86 | 2.06 |
| glia | 26.86 | -15.76 | 0.86 |
| glia.mapping1 | 31.62 | -2.50 | 1.94 |
| glia.mapping2 | 27.32 | -4.29 | 1.78 |
| gonad | 38.87 | -34.74 | 2.57 |
| imaginal_disc | 26.23 | -23.10 | 0.95 |
| leg | 27.90 | -7.50 | 0.44 |
| leg_disc | 21.94 | -1.54 | 0.52 |
| male_reproductive | 32.16 | -26.96 | 3.92 |
| malpig.mapping1 | 51.65 | -2.50 | 2.15 |
| mesectoderm.mapping1 | 47.08 | 5.00 | 3.38 |
| mesoderm.mapping1 | 39.21 | 6.67 | 1.43 |
| mesoderm.mapping2 | 39.32 | -36.58 | 1.05 |
| neuron | 31.16 | -36.10 | 0.02 |
| pns.mapping1 | 27.41 | -13.58 | 0.66 |
| reproductive.mapping2 | 32.42 | -35.65 | 2.15 |
| salivary.mapping1 | 33.61 | -18.95 | 1.27 |
| somatic_muscle.mapping1 | 35.94 | -7.39 | 2.24 |
| trachea.mapping1 | 35.75 | -2.50 | 1.20 |
| visceral.mapping1 | 37.11 | -0.98 | 1.95 |
| wing.mapping2 | 21.48 | -6.67 | 0.56 |
#write.table(poorTsetOverAll,file="~/Box/NewTrainingSetsJan2020/R/poorTsetOverAll.txt",sep = "\t")#random 29
top_fake <-read.table("/Users/hasibaasma/Box/NewTrainingSetsJan2020/R/random62oldRun/old/del_randomMEDIAN62sets_75times_Excel_withSpec.txt",header = T)
top_orig_newMed<- top_none_imm
s1 <- top_orig_newMed[order(top_orig_newMed$TsetName),]
s2 <- top_fake[order(top_fake$TsetName),]
subsetIMMevaluationOutputContRand1000_Actual1000_diff<- cbind.data.frame(s1$TsetName,s1$Method,s1$TsetSize,s1$SCRMs,s1$TrainingSetRecovered,s1$PercentageTrainingSetSensitivity,s1$REDflyRecovered,s1$PercentageRedflyRecovered,s1$percentageExpressionPatternRecall-s1$ExpectedpercentageExpressionPatternRecall,s2$SCRMs,s2$TrainingSetRecovered,s2$PercentageTrainingSetSensitivity,s2$REDflyRecovered,s2$PercentageRedflyRecovered,s1$PercentageTrainingSetSensitivity - s2$PercentageTrainingSetSensitivity,s1$PercentageRedflyRecovered - s2$PercentageRedflyRecovered)
colnames(subsetIMMevaluationOutputContRand1000_Actual1000_diff)[1] <-"TsetName"
colnames(subsetIMMevaluationOutputContRand1000_Actual1000_diff)[4] <-"SCRMs"
colnames(subsetIMMevaluationOutputContRand1000_Actual1000_diff)[9] <-"DifferenceBetweenActualRandomPercentageSpecificity"
colnames(subsetIMMevaluationOutputContRand1000_Actual1000_diff)[15] <- "DifferenceBetweenActualRandomTsetRecovery"
colnames(subsetIMMevaluationOutputContRand1000_Actual1000_diff)[16] <- "DifferenceBetweenActualRandomRedflyRecovery"
##checking..creating ranges of differences to write in the observations
minimum_differenceBetweenActualRandomRedflyRecovery <- list()
maximum_differenceBetweenActualRandomRedflyRecovery <- list()
median_differenceBetweenActualRandomRedflyRecovery <- list()
minimum_DifferenceBetweenActualTsetRecovery<- list()
maximum_DifferenceBetweenActualTsetRecovery<- list()
median_DifferenceBetweenActualTsetRecovery<- list()
minimum_specificity<- list()
maximum_specificity<- list()
median_specificity<- list()
for(i in unique(subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName)){
minimum_differenceBetweenActualRandomRedflyRecovery[[i]] <- min(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomRedflyRecovery[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
median_differenceBetweenActualRandomRedflyRecovery[[i]] <- median(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomRedflyRecovery[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
maximum_differenceBetweenActualRandomRedflyRecovery[[i]] <-max(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomRedflyRecovery[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
minimum_DifferenceBetweenActualTsetRecovery[[i]]<-min(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomTsetRecovery[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
maximum_DifferenceBetweenActualTsetRecovery[[i]]<-max(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomTsetRecovery[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
median_DifferenceBetweenActualTsetRecovery[[i]]<-median(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomTsetRecovery[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
minimum_specificity[[i]]<-min(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomPercentageSpecificity[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
maximum_specificity[[i]]<-max(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomPercentageSpecificity[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
median_specificity[[i]]<-median(subsetIMMevaluationOutputContRand1000_Actual1000_diff$DifferenceBetweenActualRandomPercentageSpecificity[subsetIMMevaluationOutputContRand1000_Actual1000_diff$TsetName==i])*100
}
library(qdapTools)
minred <- list2df(minimum_differenceBetweenActualRandomRedflyRecovery,"minimum difference to random in redfly recovery","TsetName")
medred <-list2df(median_differenceBetweenActualRandomRedflyRecovery,"median difference to random in redfly recovery","TsetName")
maxred<-list2df(maximum_differenceBetweenActualRandomRedflyRecovery,"maximum difference to random in redfly recovery","TsetName")
minTset<-list2df(minimum_DifferenceBetweenActualTsetRecovery,"minimum difference to random in Tset sensitivity","TsetName")
maxTset<- list2df(maximum_DifferenceBetweenActualTsetRecovery,"maximum difference to random in Tset sensitivity","TsetName")
medTset<-list2df(median_DifferenceBetweenActualTsetRecovery,"median difference to random in Tset sensitivity","TsetName")
minSpec<-list2df(minimum_specificity,"minimum difference to random in specificity","TsetName")
maxSpec<-list2df(maximum_specificity,"maximum difference to random in specificity","TsetName")
medSpec <-list2df(median_specificity,"median difference to random in specificity","TsetName")
df1<- merge(medred,medTset,by="TsetName")
dffinal<-merge(df1,medSpec,by="TsetName")
#dffinal<-merge(df7,by="TsetName")
#data.frame(unclass(table(dffinal)))
#table(dffinal)
write.table(dffinal,file="~/Box/Old_and_newTsets_3postProcMethods_3categories/newTsets/finaltable_new74sets_noneAmplitudeCurve.txt",sep = "\t")
library(knitr)
library(kableExtra)
library(dplyr)Conditions being set for these good Tsets include:
finaltable<- list()
#
#setting up conditions Good Tsets OverAll
dffinalDF <- as.data.frame(dffinal)
colnames(dffinalDF) <-c("TsetName","MedianDifferenceToRandomOfRedflyRecovery","MedianDifferenceInTsetSensitivity","MedianDifferenceToPermutedInTsetSpecificity")
goodTsetOverAll<- filter(dffinalDF,MedianDifferenceToRandomOfRedflyRecovery >=8, MedianDifferenceInTsetSensitivity >=8,MedianDifferenceToPermutedInTsetSpecificity >=4)
goodTsetsOverAll <- list()
colnames(goodTsetOverAll) <-c("TsetName","Difference to random in redfly recovery at cutoff","Difference in Tset Sensitivity to random at cutoff","Difference to permuted in Tset Specificity at cutoff")
goodTsetOverAll<- cbind(goodTsetOverAll[1],goodTsetOverAll[2],goodTsetOverAll[3],goodTsetOverAll[4])
kable_input3<- kable(goodTsetOverAll,digits = 2,caption = "Overall Good Training Sets with TS and RR > 8 and SP > 4")
#column_spec(kable_input3,2:10,width = "2cm")
kable_input3| TsetName | Difference to random in redfly recovery at cutoff | Difference in Tset Sensitivity to random at cutoff | Difference to permuted in Tset Specificity at cutoff |
|---|---|---|---|
| adult_mesoderm.mapping1 | 27.08 | 26.67 | 10.65 |
| adult_muscle | 23.12 | 35.00 | 10.89 |
| adult_somatic_muscle | 24.25 | 20.00 | 11.71 |
| amnioserosa.mapping1 | 27.46 | 10.00 | 5.78 |
| blastoderm.mapping1 | 17.27 | 35.00 | 14.10 |
| emb-larv_excretory | 26.98 | 17.14 | 6.25 |
| emb-larv_fat_body | 9.51 | 41.25 | 5.00 |
| emb-larv_hindgut | 19.44 | 45.71 | 5.11 |
| embryonic_muscle | 26.20 | 27.74 | 5.00 |
| embryonic_pns | 20.71 | 17.14 | 5.28 |
| embryonic_somatic_muscle | 23.45 | 36.00 | 4.67 |
| embryonic_trachea | 17.90 | 22.50 | 4.07 |
| eye_disc | 16.49 | 8.57 | 4.40 |
| eye.mapping1 | 23.31 | 30.00 | 4.11 |
| malpig.mapping1 | 27.28 | 10.00 | 6.92 |
| mesectoderm.mapping1 | 20.23 | 30.00 | 6.47 |
| mesoderm.mapping1 | 22.59 | 24.44 | 4.96 |
| somatic_muscle.mapping1 | 25.21 | 10.00 | 6.24 |
| visceral.mapping1 | 24.61 | 13.66 | 5.58 |
#write.table(goodTsetOverAll,file="~/Box/NewTrainingSetsJan2020/R/goodTsetOverAll.txt",sep = "\t")Conditions being set for these good Tsets(if we ignore specificity) include:
goodTsetsIgnoringSpecificity <- filter(dffinalDF,MedianDifferenceToRandomOfRedflyRecovery >=10,MedianDifferenceInTsetSensitivity>=10)
colnames(goodTsetsIgnoringSpecificity) <-c("TsetName","Difference to random in redfly recovery at cutoff","Difference in Tset Sensitivity to random at cutoff","Difference to permuted in Tset Specificity at cutoff")
goodTsetsIgnoringSpecificity <- cbind(goodTsetsIgnoringSpecificity[1],goodTsetsIgnoringSpecificity[2],goodTsetsIgnoringSpecificity[3],goodTsetsIgnoringSpecificity[4])
kable_input4<- kable(goodTsetsIgnoringSpecificity,digits = 2,caption = "Good Training Sets Ignoring Poor Specificity both > 10")
#column_spec(kable_input4,2:10,width = "2cm")
kable_input4| TsetName | Difference to random in redfly recovery at cutoff | Difference in Tset Sensitivity to random at cutoff | Difference to permuted in Tset Specificity at cutoff |
|---|---|---|---|
| adult_mesoderm.mapping1 | 27.08 | 26.67 | 10.65 |
| adult_muscle | 23.12 | 35.00 | 10.89 |
| adult_somatic_muscle | 24.25 | 20.00 | 11.71 |
| antennal_lobe | 16.42 | 11.61 | 1.43 |
| blastoderm.mapping1 | 17.27 | 35.00 | 14.10 |
| cardiac.mapping1 | 25.05 | 13.57 | 3.97 |
| disc.mapping2 | 19.82 | 11.35 | 3.00 |
| dorsal_ectoderm.mapping1 | 18.97 | 19.37 | 3.65 |
| emb-larv_circulatory_system | 23.35 | 16.00 | 3.37 |
| emb-larv_excretory | 26.98 | 17.14 | 6.25 |
| emb-larv_hindgut | 19.44 | 45.71 | 5.11 |
| emb-larv_visceral | 21.06 | 15.56 | 0.91 |
| embryonic_midgut | 27.57 | 18.82 | 2.62 |
| embryonic_muscle | 26.20 | 27.74 | 5.00 |
| embryonic_pns | 20.71 | 17.14 | 5.28 |
| embryonic_somatic_muscle | 23.45 | 36.00 | 4.67 |
| embryonic_trachea | 17.90 | 22.50 | 4.07 |
| eye.mapping1 | 23.31 | 30.00 | 4.11 |
| female_reproductive | 11.33 | 12.00 | 3.33 |
| haltere_disc | 23.55 | 15.56 | 1.51 |
| leg_disc | 14.53 | 13.85 | 2.31 |
| mesectoderm.mapping1 | 20.23 | 30.00 | 6.47 |
| mesoderm.mapping1 | 22.59 | 24.44 | 4.96 |
| myoblast | 15.69 | 26.67 | 1.15 |
| ventral_ectoderm.mapping1 | 11.45 | 21.90 | 2.06 |
| visceral.mapping1 | 24.61 | 13.66 | 5.58 |
#write.table(goodTsetsIgnoringSpecificity,file="~/Box/NewTrainingSetsJan2020/R/goodTsetsIgnoringSpecificity.txt",sep = "\t")These are the sets basically, that do not fall into very good or very bad training sets, mainly because they perform good in two measures but not in the third one. Not anymore , Excluded those sets which were already categorized as good.
Specific conditions being set for these Intermediate sets include:
intermediateTSets <- filter(dffinalDF,MedianDifferenceToRandomOfRedflyRecovery >=8 & MedianDifferenceInTsetSensitivity >=8 & MedianDifferenceToPermutedInTsetSpecificity >=0 | MedianDifferenceToRandomOfRedflyRecovery >=0 & MedianDifferenceInTsetSensitivity >=8 & MedianDifferenceToPermutedInTsetSpecificity >=4 |MedianDifferenceToRandomOfRedflyRecovery >=8 & MedianDifferenceInTsetSensitivity >=0 & MedianDifferenceToPermutedInTsetSpecificity >=4 )
onlyintermediateTSets=data.frame()
# onlyintermediateTSetsNum <- which(!(intermediateTSets$TsetName %in% goodTsetOverAll$TsetName))
# for (i in 1:length(onlyintermediateTSetsNum)){
# onlyintermediateTSetIter <- intermediateTSets[onlyintermediateTSetsNum[i],]
# onlyintermediateTSets <- rbind(onlyintermediateTSets,onlyintermediateTSetIter)
#
# }
#intermediateTSets <- onlyintermediateTSets
colnames(intermediateTSets) <-c("TsetName","Difference to random in redfly recovery at cutoff","Difference in Tset Sensitivity to random at cutoff","Difference to random in Tset Specificity at cutoff")
intermediateTSets <- cbind(intermediateTSets[1],intermediateTSets[2],intermediateTSets[3],intermediateTSets[4])
kable_input41<- kable(intermediateTSets,digits = 2,caption = "Intermediate Tsets with all 3 interm")
#column_spec(kable_input41,2:10,width = "3cm")
kable_input41| TsetName | Difference to random in redfly recovery at cutoff | Difference in Tset Sensitivity to random at cutoff | Difference to random in Tset Specificity at cutoff |
|---|---|---|---|
| adult_mesoderm.mapping1 | 27.08 | 26.67 | 10.65 |
| adult_muscle | 23.12 | 35.00 | 10.89 |
| adult_somatic_muscle | 24.25 | 20.00 | 11.71 |
| amnioserosa.mapping1 | 27.46 | 10.00 | 5.78 |
| antennal_lobe | 16.42 | 11.61 | 1.43 |
| blastoderm.mapping1 | 17.27 | 35.00 | 14.10 |
| cardiac.mapping1 | 25.05 | 13.57 | 3.97 |
| disc.mapping2 | 19.82 | 11.35 | 3.00 |
| dorsal_ectoderm.mapping1 | 18.97 | 19.37 | 3.65 |
| emb-larv_circulatory_system | 23.35 | 16.00 | 3.37 |
| emb-larv_excretory | 26.98 | 17.14 | 6.25 |
| emb-larv_fat_body | 9.51 | 41.25 | 5.00 |
| emb-larv_foregut | 21.61 | 7.22 | 6.17 |
| emb-larv_hindgut | 19.44 | 45.71 | 5.11 |
| emb-larv_visceral | 21.06 | 15.56 | 0.91 |
| emb-larval_opticlobe | 24.16 | 8.28 | 3.04 |
| embryonic_epidermis | 32.66 | 6.67 | 6.50 |
| embryonic_midgut | 27.57 | 18.82 | 2.62 |
| embryonic_muscle | 26.20 | 27.74 | 5.00 |
| embryonic_pns | 20.71 | 17.14 | 5.28 |
| embryonic_sense_organ | 0.44 | 26.67 | 9.61 |
| embryonic_somatic_muscle | 23.45 | 36.00 | 4.67 |
| embryonic_trachea | 17.90 | 22.50 | 4.07 |
| eye_disc | 16.49 | 8.57 | 4.40 |
| eye-antennal_disc | 18.65 | 2.22 | 4.70 |
| eye.mapping1 | 23.31 | 30.00 | 4.11 |
| fat_body.mapping1 | 7.47 | 35.00 | 4.31 |
| female_gonad.mapping1 | 14.79 | 1.38 | 4.13 |
| female_reproductive | 11.33 | 12.00 | 3.33 |
| haltere_disc | 23.55 | 15.56 | 1.51 |
| leg_disc | 14.53 | 13.85 | 2.31 |
| malpig.mapping1 | 27.28 | 10.00 | 6.92 |
| mesectoderm.mapping1 | 20.23 | 30.00 | 6.47 |
| mesoderm.mapping1 | 22.59 | 24.44 | 4.96 |
| myoblast | 15.69 | 26.67 | 1.15 |
| pns.mapping1 | 20.50 | 9.06 | 3.17 |
| salivary.mapping1 | 23.04 | 2.11 | 7.51 |
| somatic_muscle.mapping1 | 25.21 | 10.00 | 6.24 |
| ventral_ectoderm.mapping1 | 11.45 | 21.90 | 2.06 |
| visceral.mapping1 | 24.61 | 13.66 | 5.58 |
| wing.mapping2 | 18.33 | 8.48 | 3.40 |
#write.table(intermediateTSets,file="~/Box/NewTrainingSetsJan2020/R/intermediateTSets.txt",sep = "\t")Conditions being set for these poor sets include. If a set is poor in two categories then its a poor set:
#poor Tsets
poorTsetOverAll<- filter(dffinalDF, MedianDifferenceToRandomOfRedflyRecovery <=8 & MedianDifferenceInTsetSensitivity <= 8 | MedianDifferenceInTsetSensitivity <= 8 & MedianDifferenceToPermutedInTsetSpecificity<=4 |MedianDifferenceToPermutedInTsetSpecificity<=4 & MedianDifferenceToRandomOfRedflyRecovery <=8 )
colnames(poorTsetOverAll) <-c("TsetName","Difference to random in redfly recovery at cutoff","Difference in Tset Sensitivity to random at cutoff","Difference to random in Tset Specificity at cutoff")
poorTsetOverAll<- cbind(poorTsetOverAll[1],poorTsetOverAll[2],poorTsetOverAll[3],poorTsetOverAll[4])
kable_input6<- kable(poorTsetOverAll,digits = 2,caption = "Overall Poor Training Sets with any one below 0")
#column_spec(kable_input6,2:10,width = "2cm")
kable_input6| TsetName | Difference to random in redfly recovery at cutoff | Difference in Tset Sensitivity to random at cutoff | Difference to random in Tset Specificity at cutoff |
|---|---|---|---|
| adult_brain | 9.48 | -25.71 | 2.06 |
| adult_circulatory | 21.46 | 5.00 | 3.03 |
| adult_cns | 10.81 | -27.80 | 1.69 |
| adult_midgut | 21.91 | 7.37 | -1.54 |
| adult_nervous | 11.96 | -32.08 | 2.10 |
| adult_sense_organ | 6.89 | 10.00 | 3.85 |
| antenna | 17.32 | -23.75 | 1.58 |
| antennal_disc | 6.07 | 2.86 | 0.79 |
| disc.mapping1 | 19.35 | -5.15 | 3.39 |
| ectoderm.mapping1 | 21.87 | -7.24 | 1.60 |
| ectoderm.mapping2 | 23.02 | -15.56 | 2.19 |
| emb-larval_cns | -2.90 | 1.07 | 0.37 |
| emb-larval_mushroombody | 10.33 | 5.00 | 3.51 |
| emb-larval_neuron | 17.51 | -29.04 | 0.37 |
| embryonic_ventral_nervous_system | 19.30 | -12.06 | 2.17 |
| endoderm.mapping1 | 24.43 | -3.46 | 3.04 |
| eye.mapping2 | 26.70 | -10.31 | 3.85 |
| genital_disc | 22.82 | 6.43 | 1.42 |
| glia | 19.27 | 2.42 | 1.36 |
| glia.mapping1 | 17.07 | 5.00 | 3.61 |
| glia.mapping2 | 17.42 | 2.86 | 2.65 |
| imaginal_disc | 13.54 | -16.06 | 3.27 |
| leg | 21.55 | 5.00 | 3.95 |
| mesoderm.mapping2 | 28.51 | -29.04 | 3.76 |
| neuron | 16.04 | -32.21 | 0.43 |
| trachea.mapping1 | 18.05 | 5.83 | 0.74 |
#write.table(poorTsetOverAll,file="~/Box/NewTrainingSetsJan2020/R/poorTsetOverAll.txt",sep = "\t")